Estadística y Manejo de Datos con R (EMDR) — Virtual
¿Para qué elaborar un script?
Automatizar un código que queremos correr múltiples veces.
Crear y guardar código que sirve para múltiples propósitos.
Organizar en módulos editables un proceso largo y complejo que rebasa las capacidades de la línea de comando.
long_function_name <- function(a = "a long string",
b = "another long string",
c = "an even longer string") {
}
# Good fit_models.R utility_functions.R # Bad foo.r stuff.r
# Good day_one day_1 # Bad first_day_of_the_month DayOne dayone djm1
# Bad
T <- FALSE
c <- 10
mean <- function(x) { sum(x) }
# Good average <- mean(feet / 12 + inches, na.rm = TRUE) # Bad average<-mean(feet/12+inches,na.rm=TRUE)
plot(x) # Good plot (x) # Bad plot( x ) # Bad base::get # Good base :: get # Bad
Conflictos entre paquetes
summarise de dplyr y summarise de MASS)Podemos hacer referencia a una función sin cargar la biblioteca de funciones (library) con el operador ::
dplyr::summarise() MASS::summarise()
# Good x <- 5 # Bad x = 5
# Load data --------------------------- # Run model --------------------------- # Plot data ---------------------------
# Pedro Coyotl, 29/02/2020 ¡Este código es un ejemplo!
# Limpieza del espacio de trabajo
rm(list =ls()) # remueve variables del workspace
graphics.off() # cierra todas las ventanas de graficos
# Carga de bibliotecas o paquetes (ya instalados)
library(dplyr)
library(magrittr)
library(tidyr)
# Carga de datos al espacio de trabajo
datos <- read.table("mis_datos.csv", header =T , sep =",")
# Análisis
mean(datos$pH)
sd(datos$pH)
plot(datos$pH, datos$dia)
datos %>% group_by(mes) %>% summarise(pH_p = mean(pH), pH_de = sd(pH))
y <- m * x + b
recta <- function(m, x, b) {
y <- m * x + b
return(y)
}
y <- recta(m = 1, x = 1:10, b = 3)
plot(y)
nombre <- function(arg.1, arg.2, arg.n) {
contenido # va dentro de las llaves
return(resultado)
}
resultado <- nombre(arg.1, arg.2, arg.n) # argumentos van entre los paréntesis
suma <- function(a = 1, b = 1) {
c = a + b
return(c)
}
suma()
suma(2, 2)
ifa <- 5 # intenta con a <- -5
if (a > 0) {
print("el valor es mayor a zero")
}
## [1] "el valor es mayor a zero"
elsea <- 5 # intenta con a <- -5
if (a > 0) {
print("el valor es mayor a cero")
} else {
print("el valor es menor a cero")
}
## [1] "el valor es mayor a cero"
else ifa <- 0
if (a > 0) {
print("el valor es positivo")
} else if (a == 0) {
print("el valor es cero")
}
## [1] "el valor es cero"
| Símbolo | Operación |
|---|---|
| x == y | equivalencia |
| x != y | diferencia |
| !x | negación lógica |
| x & y | y |
| x | y | o |
a <- 1 b <- c(1:5) a == b
## [1] TRUE FALSE FALSE FALSE FALSE
a %in% b
## [1] TRUE
all():v1 <- c("A", "B", "C", "D")
v2 <- v1
all(v1 == v2)
## [1] TRUE
v3 <- c("A", "C", "C", "E")
all(v1 == v3)
## [1] FALSE
v4 <- c("A","B","A","B")
v5 <- c("A","B")
all(v4 == v5)
## [1] TRUE
all(length(v4) == length(v5)) & all(v4 == v5)
## [1] FALSE
v1 <- c("A", "B", "C", "D")
v3 <- c("A", "C", "C", "E")
identical(v1 ,v3)
## [1] FALSE
which(v1 != v3)
## [1] 2 4
union(x, y) intersect(x, y) setdiff(y, x) setequal(x, y) duplicated(x) unique(x)
length()round()is.numeric(), is.character()for (i in inicio:fin) {
# operaciones a realizar
}
for (i in 1:10) {
print(i)
}
## [1] 1 ## [1] 2 ## [1] 3 ## [1] 4 ## [1] 5 ## [1] 6 ## [1] 7 ## [1] 8 ## [1] 9 ## [1] 10
abc <- seq(1 ,10 ,1) # con seq()
for (i in abc) {
print(i)
}
## [1] 1 ## [1] 2 ## [1] 3 ## [1] 4 ## [1] 5 ## [1] 6 ## [1] 7 ## [1] 8 ## [1] 9 ## [1] 10
abc <- letters [1:10] # con length()
for (i in 1:length(abc)) {
print(i)
}
## [1] 1 ## [1] 2 ## [1] 3 ## [1] 4 ## [1] 5 ## [1] 6 ## [1] 7 ## [1] 8 ## [1] 9 ## [1] 10
while()while (condicion == TRUE) {
# operaciones
}
i <- 0
while (i < 10) {
i <- i + 1 # el contador no incrementa automáticamente
print(i)
}
## [1] 1 ## [1] 2 ## [1] 3 ## [1] 4 ## [1] 5 ## [1] 6 ## [1] 7 ## [1] 8 ## [1] 9 ## [1] 10
Escribe un loop (una iteración) que imprima todas las letras del abecedario, excepto las vocales - éstas deben ser impresas en mayúsculas.
letters, toupper()purrr
Instalamos las bibliotecas.
install.packages("purrr")
install.packages("tidyverse")
install.packages("broom")
library(purrr) library(tidyverse) library(broom)
nest() y unnest()head(mtcars) ?mtcars
n_mtcars <- mtcars %>% nest(-cyl) # produce un df de listas
## Warning: All elements of `...` must be named. ## Did you want `data = c(mpg, disp, hp, drat, wt, qsec, vs, am, gear, carb)`?
n_mtcars
## # A tibble: 3 × 2 ## cyl data ## <dbl> <list> ## 1 6 <tibble [7 × 10]> ## 2 4 <tibble [11 × 10]> ## 3 8 <tibble [14 × 10]>
unnest()n_mtcars %>% unnest()
## Warning: `cols` is now required when using unnest(). ## Please use `cols = c(data)`
## # A tibble: 32 × 11 ## cyl mpg disp hp drat wt qsec vs am gear carb ## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> ## 1 6 21 160 110 3.9 2.62 16.5 0 1 4 4 ## 2 6 21 160 110 3.9 2.88 17.0 0 1 4 4 ## 3 6 21.4 258 110 3.08 3.22 19.4 1 0 3 1 ## 4 6 18.1 225 105 2.76 3.46 20.2 1 0 3 1 ## 5 6 19.2 168. 123 3.92 3.44 18.3 1 0 4 4 ## 6 6 17.8 168. 123 3.92 3.44 18.9 1 0 4 4 ## 7 6 19.7 145 175 3.62 2.77 15.5 0 1 5 6 ## 8 4 22.8 108 93 3.85 2.32 18.6 1 1 4 1 ## 9 4 24.4 147. 62 3.69 3.19 20 1 0 4 2 ## 10 4 22.8 141. 95 3.92 3.15 22.9 1 0 4 2 ## # … with 22 more rows
map()my_test <- function(x) {
lm(mpg ~ wt, data=x)
}
mtcars %>%
nest(-cyl) %>%
mutate(res = map(data, my_test))
## Warning: All elements of `...` must be named. ## Did you want `data = c(mpg, disp, hp, drat, wt, qsec, vs, am, gear, carb)`?
## # A tibble: 3 × 3 ## cyl data res ## <dbl> <list> <list> ## 1 6 <tibble [7 × 10]> <lm> ## 2 4 <tibble [11 × 10]> <lm> ## 3 8 <tibble [14 × 10]> <lm>
map()my_test <- function(x) {
lm(mpg ~ wt, data=x)
}
mtcars %>%
nest(-cyl) %>%
mutate(res = map(data, my_test)) %>%
mutate(glance_lm = res %>% map(glance))
## Warning: All elements of `...` must be named. ## Did you want `data = c(mpg, disp, hp, drat, wt, qsec, vs, am, gear, carb)`?
## # A tibble: 3 × 4 ## cyl data res glance_lm ## <dbl> <list> <list> <list> ## 1 6 <tibble [7 × 10]> <lm> <tibble [1 × 12]> ## 2 4 <tibble [11 × 10]> <lm> <tibble [1 × 12]> ## 3 8 <tibble [14 × 10]> <lm> <tibble [1 × 12]>
map()my_test <- function(x) {
lm(mpg ~ wt, data=x)
}
mtcars %>%
nest(-cyl) %>%
mutate(res = map(data, my_test)) %>%
mutate(glance_lm = res %>% map(glance)) %>%
unnest(glance_lm)
## Warning: All elements of `...` must be named. ## Did you want `data = c(mpg, disp, hp, drat, wt, qsec, vs, am, gear, carb)`?
## # A tibble: 3 × 15 ## cyl data res r.squared adj.r.squared sigma statistic p.value df logLik ## <dbl> <lis> <lis> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> ## 1 6 <tib… <lm> 0.465 0.357 1.17 4.34 0.0918 1 -9.83 ## 2 4 <tib… <lm> 0.509 0.454 3.33 9.32 0.0137 1 -27.7 ## 3 8 <tib… <lm> 0.423 0.375 2.02 8.80 0.0118 1 -28.7 ## # … with 5 more variables: AIC <dbl>, BIC <dbl>, deviance <dbl>, ## # df.residual <int>, nobs <int>
map(), otra versión.mtcars %>% split(.$cyl) # de R base
## $`4` ## mpg cyl disp hp drat wt qsec vs am gear carb ## Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1 ## Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2 ## Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2 ## Fiat 128 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1 ## Honda Civic 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2 ## Toyota Corolla 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1 ## Toyota Corona 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1 ## Fiat X1-9 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1 ## Porsche 914-2 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2 ## Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2 ## Volvo 142E 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2 ## ## $`6` ## mpg cyl disp hp drat wt qsec vs am gear carb ## Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4 ## Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4 ## Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1 ## Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1 ## Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4 ## Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4 ## Ferrari Dino 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6 ## ## $`8` ## mpg cyl disp hp drat wt qsec vs am gear carb ## Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2 ## Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4 ## Merc 450SE 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3 ## Merc 450SL 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3 ## Merc 450SLC 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3 ## Cadillac Fleetwood 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4 ## Lincoln Continental 10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4 ## Chrysler Imperial 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4 ## Dodge Challenger 15.5 8 318.0 150 2.76 3.520 16.87 0 0 3 2 ## AMC Javelin 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2 ## Camaro Z28 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4 ## Pontiac Firebird 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2 ## Ford Pantera L 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4 ## Maserati Bora 15.0 8 301.0 335 3.54 3.570 14.60 0 1 5 8
map(), otra versión.mtcars %>% split(.$cyl) %>% map(~ lm(mpg ~ wt, data = .))
## $`4` ## ## Call: ## lm(formula = mpg ~ wt, data = .) ## ## Coefficients: ## (Intercept) wt ## 39.571 -5.647 ## ## ## $`6` ## ## Call: ## lm(formula = mpg ~ wt, data = .) ## ## Coefficients: ## (Intercept) wt ## 28.41 -2.78 ## ## ## $`8` ## ## Call: ## lm(formula = mpg ~ wt, data = .) ## ## Coefficients: ## (Intercept) wt ## 23.868 -2.192
map(), otra versión.mtcars %>% split(.$cyl) %>% map(~ lm(mpg ~ wt, data = .)) %>% map(summary)
## $`4` ## ## Call: ## lm(formula = mpg ~ wt, data = .) ## ## Residuals: ## Min 1Q Median 3Q Max ## -4.1513 -1.9795 -0.6272 1.9299 5.2523 ## ## Coefficients: ## Estimate Std. Error t value Pr(>|t|) ## (Intercept) 39.571 4.347 9.104 7.77e-06 *** ## wt -5.647 1.850 -3.052 0.0137 * ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ## ## Residual standard error: 3.332 on 9 degrees of freedom ## Multiple R-squared: 0.5086, Adjusted R-squared: 0.454 ## F-statistic: 9.316 on 1 and 9 DF, p-value: 0.01374 ## ## ## $`6` ## ## Call: ## lm(formula = mpg ~ wt, data = .) ## ## Residuals: ## Mazda RX4 Mazda RX4 Wag Hornet 4 Drive Valiant Merc 280 ## -0.1250 0.5840 1.9292 -0.6897 0.3547 ## Merc 280C Ferrari Dino ## -1.0453 -1.0080 ## ## Coefficients: ## Estimate Std. Error t value Pr(>|t|) ## (Intercept) 28.409 4.184 6.789 0.00105 ** ## wt -2.780 1.335 -2.083 0.09176 . ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ## ## Residual standard error: 1.165 on 5 degrees of freedom ## Multiple R-squared: 0.4645, Adjusted R-squared: 0.3574 ## F-statistic: 4.337 on 1 and 5 DF, p-value: 0.09176 ## ## ## $`8` ## ## Call: ## lm(formula = mpg ~ wt, data = .) ## ## Residuals: ## Min 1Q Median 3Q Max ## -2.1491 -1.4664 -0.8458 1.5711 3.7619 ## ## Coefficients: ## Estimate Std. Error t value Pr(>|t|) ## (Intercept) 23.8680 3.0055 7.942 4.05e-06 *** ## wt -2.1924 0.7392 -2.966 0.0118 * ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ## ## Residual standard error: 2.024 on 12 degrees of freedom ## Multiple R-squared: 0.423, Adjusted R-squared: 0.3749 ## F-statistic: 8.796 on 1 and 12 DF, p-value: 0.01179
map(), otra versión.mtcars %>%
split(.$cyl) %>%
map(~ lm(mpg ~ wt, data = .)) %>%
map(summary) %>%
map("r.squared")
## $`4` ## [1] 0.5086326 ## ## $`6` ## [1] 0.4645102 ## ## $`8` ## [1] 0.4229655
map(), otra versión.mtcars %>%
split(.$cyl) %>%
map(~ lm(mpg ~ wt, data = .)) %>%
map(summary) %>%
map_dbl("r.squared")
## 4 6 8 ## 0.5086326 0.4645102 0.4229655
map(), otra versión.mtcars %>%
split(.$cyl) %>%
map(~ lm(mpg ~ wt, data = .)) %>%
map(summary) %>%
map_df("r.squared")
## # A tibble: 1 × 3 ## `4` `6` `8` ## <dbl> <dbl> <dbl> ## 1 0.509 0.465 0.423
Usando data(txhousing, package="ggplot2") , escribe, mediante un modelo, la relación lineal entre sales y listings para cada categoria de year y extrae el valor de p
lm(sales ~ listings)
Estadística y Manejo de Datos con R (EMDR) por
Marcos F. Rosetti S. y Luis Pacheco-Cobos se distribuye bajo una Licencia Creative Commons Atribución 4.0 Internacional.